import os
from bs4 import BeautifulSoup

# from lxml

# folders = ['E:/books/oss_local/READER_NEW/20220210/11624566中国古医籍整理丛书·第一辑（全20册）'
#     , 'E:/books/oss_local/READER_NEW/20220210/11624567中国古医籍整理丛书·第二辑（全20册）'
#     , 'E:/books/oss_local/READER_NEW/20220210/11624568中国古医籍整理丛书·第三辑（全20册）'
#     , 'E:/books/oss_local/READER_NEW/20220210/11624570中国古医籍整理丛书·第五辑（全20册）'
#     , 'E:/books/oss_local/READER_NEW/20220210/11624571中国古医籍整理丛书（六）'
#     , 'E:/books/oss_local/READER_NEW/20220210/11624572中国古医籍整理丛书（七）'
#            ]
doc_root = 'E:/books/oss_local/'


# result = "1011.html".find('~')
# print(result)




# get_txt_from_reader_folders()
# get_chars_from_folder_txt_file('D:/ocr/ocr_resouces/中医古籍')
# get_chars_from_folder_txt_file('D:/workspace/python/train_data/train_data_configs/words/古诗数据集')
# get_folder_name(out_root)

# test_folders = [doc_root + 'READER_NEW/20220210/11899981曹颖甫医著大成'
#                 ]
# genContent(test_folders)

# folders = [
#     doc_root + 'READER_NEW/20220210/11899981曹颖甫医著大成',
#     doc_root + 'READER_NEW/20220210/11899982费伯雄医著大成',
#     doc_root + 'READER_NEW/20220210/11899997恽铁樵医著大成',
#     doc_root + 'READER_NEW/20220210/11899998张锡纯医著大成',
#     doc_root + 'READER_NEW/20220210/11899999丁甘仁医著大成',
#     doc_root + 'READER_NEW/2023/03/29/12873131'
# ]
# out_root = 'D:/workspace/python/train_data/train_data_configs/words/医著大成/'

# folders = [doc_root + 'READER_NEW/20211028/12196472南怀瑾著作全集']
#
# out_root = 'D:/workspace/python/train_data/train_data_configs/words/南怀瑾/'
#
# get_txt_from_reader_folders(folders)
